---
title: "PAP Analysis for Disrupting Compliance: The Impact of a Randomized Tax Holiday in Uruguay"
output: pdf_document
date: "2023-05-06"
---

```{r setup, include=TRUE, fig.height=4.5, warning=FALSE}
################################################################################
# PAP 1 & 2 code execution
# Last revised April 2023
################################################################################

# Basic setup --
rm(list=ls())
set.seed(1234) 
options(scipen=99999, digits=3)


##############################################################
message("required libraries, setwd & load functions")

# Load/install packages --
if (!require("pacman")) install.packages("pacman")
pacman::p_load(
  plyr,
  ggplot2,
  reshape2,
  zoo,
  sandwich,
  AER,
  xtable,
  stats,
  tidyr,
  dplyr,
  weights,
  estimatr
)

conflicted::conflicts_prefer(dplyr::filter)

#Set wd ---
home <-dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(home)

#load functions ---
source("t_test.R")

################################################################################
# PAP 1 - NATURAL AND FIELD EXPERIMENTS

message("load data")

load("panel_taxtime.Rda")
load("naturalex_debt_gtp.Rda")
load("fieldex_data.Rda")

##############################################################
message("Clean & add new variables")

# switch tax names to english
taxes_panel$tax <- taxes_panel$TRIBUTO
taxes_panel$tax <- as.factor(taxes_panel$tax)
levels(taxes_panel$tax) <-  c("Property", "Vehicle", "Sewage", "Head")

# check when the holiday takes place for each tax
holiday <- taxes_panel %>% dplyr::filter(cuota_exonerada==1) %>% group_by(tax) %>%
  dplyr::summarise(
    holiday_start = min(t),
    holiday_end = max(t)
  )

holiday

# For the treatment group, we replace with NAs the observations under the holiday
taxes_panel <- taxes_panel %>% inner_join(holiday)
taxes_panel$en_fecha[taxes_panel$TREATMENT==1 &
                       taxes_panel$t>=taxes_panel$holiday_start &
                       taxes_panel$t<=taxes_panel$holiday_end] <- NA
taxes_panel$nr_paymntsowed[taxes_panel$TREATMENT==1 &
                               taxes_panel$t>=taxes_panel$holiday_start &
                               taxes_panel$t<=taxes_panel$holiday_end] <- NA

# missed payment variable
taxes_panel$missed_payment <- as.numeric(taxes_panel$en_fecha==0)

# compliance variable
taxes_panel$compliance <- as.numeric(taxes_panel$nr_paymntsowed==0)

# drop some rows
taxes_panel <- taxes_panel[!(taxes_panel$tax=="Vehicle" & 
                               taxes_panel$t %in% c(23,24)),]


##############################################################
# GRAPHICAL ANALYSIS - NATURAL EXPERIMENT. Impact of the tax holiday lottery

#Missed current payments
#Compliance
#Cumulative missed payments

## By type of tax

#prep data
plot_data <- taxes_panel %>% filter(ES_BP==1) %>%
  group_by(tax, t, TREATMENT) %>% dplyr::summarize(
    N = n(),
    
    # missed current payment
    missed_payment_mean = mean(missed_payment, na.rm=T),
    se_missed_payment_mean = sd(missed_payment, na.rm=T)/sqrt(N), 
    missed_payment_upper= missed_payment_mean + qnorm(.975)*(se_missed_payment_mean),
    missed_payment_lower= missed_payment_mean - qnorm(.975)*(se_missed_payment_mean),
    
    compliance_mean = mean(compliance, na.rm=T),
    se_compliance_mean = sd(compliance, na.rm=T)/sqrt(N),
    compliance_upper= compliance_mean + qnorm(.975)*se_compliance_mean, 
    compliance_lower= compliance_mean - qnorm(.975)*se_compliance_mean,
    
    nr_missed_mean = mean(nr_paymntsowed, na.rm=T),
    se_nr_missed_mean = sd(nr_paymntsowed, na.rm=T)/sqrt(N),
    nr_missed_upper= nr_missed_mean + qnorm(.975)*(se_nr_missed_mean), 
    nr_missed_lower= nr_missed_mean - qnorm(.975)*(se_nr_missed_mean)
    
  ) 

plot_data <- plot_data %>% inner_join(holiday)

plot_data$Winner <- as.factor(plot_data$TREATMENT)

# missed payment figure ---
plot_data %>% filter(t>-15 & t<21) %>% 
  ggplot(aes(x=t, y=missed_payment_mean, color=Winner)) + 
  facet_wrap(tax ~ . , scales = "free_y") +
  geom_rect(aes(xmin=holiday_start, xmax=holiday_end, ymin=-Inf, ymax=Inf),
            fill="gray80", color="gray80", alpha=.1) +
  geom_vline(aes(xintercept = 0)) +
  geom_point(size=2.4) +
  #ylim(0, .5) +
  xlim(-15,20) + 
  xlab("bills since tax holiday") + 
  ylab("mean of missed current payment") + 
  geom_errorbar(aes(ymin=missed_payment_lower,                                                             
                    ymax=missed_payment_upper), # colour="blue",                                                                         
                width=.3, alpha=.75) + 
  scale_color_brewer(palette="Set1") +
  ggtitle("Missed Payment") +
  theme_minimal() +
  theme(plot.title = element_text(size = rel(1.2)),
        axis.text = element_text(size = rel(1)), 
        axis.title.y = element_text(size = rel(1)), 
        axis.title.x = element_text(size = rel(1)), 
        strip.text.x = element_text(size = rel(1.2)), 
        legend.position = "bottom") 


# compliance figure ---
plot_data %>% filter(t>-15 & t<21) %>% 
  ggplot(aes(x=t, y=compliance_mean, color=Winner)) + 
  facet_wrap(tax ~ . , scales = "free_y") +
  geom_rect(aes(xmin=holiday_start, xmax=holiday_end, ymin=-Inf, ymax=Inf),
            fill="gray80", color="gray80", alpha=.1) +
  geom_vline(aes(xintercept = 0)) +
  geom_point(size=2.4) +
  #ylim(0, .5) +
  xlim(-15,20) + 
  xlab("bills since tax holiday") + 
  ylab("mean of compliance") + 
  geom_errorbar(aes(ymin=compliance_lower,                                                             
                    ymax=compliance_upper), # colour="blue",                                                                         
                width=.3, alpha=.75) + 
  scale_color_brewer(palette="Set1") +
  ggtitle("Compliance") +
  theme_minimal() +
  theme(plot.title = element_text(size = rel(1.2)),
        axis.text = element_text(size = rel(1)), 
        axis.title.y = element_text(size = rel(1)), 
        axis.title.x = element_text(size = rel(1)), 
        strip.text.x = element_text(size = rel(1.2)), 
        legend.position = "bottom") 

# nr missed payments figure ---
plot_data %>% filter(t>-15 & t<21) %>% 
  ggplot(aes(x=t, y=nr_missed_mean, color=Winner)) + 
  facet_wrap(tax ~ . , scales = "free_y") +
  geom_rect(aes(xmin=holiday_start, xmax=holiday_end, ymin=-Inf, ymax=Inf),
            fill="gray80", color="gray80", alpha=.1) +
  geom_vline(aes(xintercept = 0)) +
  geom_point(size=2.4) +
  ylim(-.2, 2.75) +
  xlim(-15,20) + 
  xlab("bills since tax holiday") + 
  ylab("mean nr of payments owed") + 
  geom_errorbar(aes(ymin=nr_missed_lower,                                                             
                    ymax=nr_missed_upper), # colour="blue",                                                                         
                width=.3, alpha=.75) + 
  scale_color_brewer(palette="Set1") +
  ggtitle("Cumulative Missed Payments") +
  theme_minimal() +
  theme(plot.title = element_text(size = rel(1.2)),
        axis.text = element_text(size = rel(1)), 
        axis.title.y = element_text(size = rel(1)), 
        axis.title.x = element_text(size = rel(1)), 
        strip.text.x = element_text(size = rel(1.2)), 
        legend.position = "bottom") 



## All taxes together 
#Prep data

#for this we first need to standardize the timeline 
taxes_panel$t_st <- taxes_panel$t

# drop data within holiday window for all eligible taxpayers
taxes_panel <- taxes_panel %>% filter(!(ES_BP==1 & t>0 & t<=holiday_end))
# for eligibles, fix full timeframe so that first payment period after holiday is always 3
taxes_panel$t_st[taxes_panel$ES_BP==1 & taxes_panel$t > 0] <- 
  taxes_panel$t - taxes_panel$holiday_end + 3

# drop data within holiday window (but outside the standard 3 payment period holiday) 
# for all ineligible taxpayers
taxes_panel <- taxes_panel %>% filter(!(ES_BP==0 & t>=4 & t<= holiday_end))

holiday

taxes_panel$st <- taxes_panel$holiday_end - 3

taxes_panel$t_st <- ifelse((taxes_panel$ES_BP==1 & taxes_panel$t > 0) | 
                             (taxes_panel$ES_BP==0 & taxes_panel$t > 4), 
                           taxes_panel$t - taxes_panel$st, taxes_panel$t)


plot_data <- taxes_panel %>% filter(ES_BP==1) %>%
  group_by(t_st, TREATMENT) %>% dplyr::summarize(
    N = n(),
    
    # missed current payment
    missed_payment_mean = mean(missed_payment, na.rm=T),
    se_missed_payment_mean = sd(missed_payment, na.rm=T)/sqrt(N), 
    missed_payment_upper= missed_payment_mean + qnorm(.975)*(se_missed_payment_mean),
    missed_payment_lower= missed_payment_mean - qnorm(.975)*(se_missed_payment_mean),
    
    compliance_mean = mean(compliance, na.rm=T),
    se_compliance_mean = sd(compliance, na.rm=T)/sqrt(N),
    compliance_upper= compliance_mean + qnorm(.975)*se_compliance_mean, 
    compliance_lower= compliance_mean - qnorm(.975)*se_compliance_mean,
    
    nr_missed_mean = mean(nr_paymntsowed, na.rm=T),
    se_nr_missed_mean = sd(nr_paymntsowed, na.rm=T)/sqrt(N),
    nr_missed_upper= nr_missed_mean + qnorm(.975)*(se_nr_missed_mean), 
    nr_missed_lower= nr_missed_mean - qnorm(.975)*(se_nr_missed_mean)
    
  ) 

plot_data$Winner <- as.factor(plot_data$TREATMENT)

# missed payment figure ---
ggplot(plot_data, aes(x=t_st, y=missed_payment_mean, color=Winner)) + 
  geom_vline(aes(xintercept = 0)) +
  geom_point(size=2.4) +
  ylim(0, .18) +
  xlim(-15,20) + 
  xlab("bills since tax holiday") + 
  ylab("mean of missed current payment") + 
  geom_errorbar(aes(ymin=missed_payment_lower,                                                             
                    ymax=missed_payment_upper), # colour="blue",                                                                         
                width=.3, alpha=.75) + 
  scale_color_brewer(palette="Set1") +
  ggtitle("Missed Payment - Pooled Taxes") +
  theme_minimal() +
  theme(plot.title = element_text(size = rel(1.2)),
        axis.text = element_text(size = rel(1)), 
        axis.title.y = element_text(size = rel(1)), 
        axis.title.x = element_text(size = rel(1)), 
        strip.text.x = element_text(size = rel(1.2)), 
        legend.position = "bottom") 


# compliance figure ---
ggplot(plot_data, aes(x=t_st, y=compliance_mean, color=Winner)) + 
  geom_vline(aes(xintercept = 0)) +
  geom_point(size=2.4) +
  ylim(0.6, 1) +
  xlim(-15,20) + 
  xlab("bills since tax holiday") + 
  ylab("mean of compliance") + 
  geom_errorbar(aes(ymin=compliance_lower,                                                             
                    ymax=compliance_upper), # colour="blue",                                                                         
                width=.3, alpha=.75) + 
  scale_color_brewer(palette="Set1") +
  ggtitle("Compliance - Pooled Taxes") +
  theme_minimal() +
  theme(plot.title = element_text(size = rel(1.2)),
        axis.text = element_text(size = rel(1)), 
        axis.title.y = element_text(size = rel(1)), 
        axis.title.x = element_text(size = rel(1)), 
        strip.text.x = element_text(size = rel(1.2)), 
        legend.position = "bottom") 

# nr missed payments figure ---
ggplot(plot_data, aes(x=t_st, y=nr_missed_mean, color=Winner)) + 
  geom_vline(aes(xintercept = 0)) +
  geom_point(size=2.4) +
  ylim(0, 1) +
  xlim(-15,20) + 
  xlab("bills since tax holiday") + 
  ylab("mean nr of missed payments") + 
  geom_errorbar(aes(ymin=nr_missed_lower,                                                             
                    ymax=nr_missed_upper), # colour="blue",                                                                         
                width=.3, alpha=.75) + 
  scale_color_brewer(palette="Set1") +
  ggtitle("Cumulative Missed Payments - Pooled Taxes") +
  theme_minimal() +
  theme(plot.title = element_text(size = rel(1.2)),
        axis.text = element_text(size = rel(1)), 
        axis.title.y = element_text(size = rel(1)), 
        axis.title.x = element_text(size = rel(1)), 
        strip.text.x = element_text(size = rel(1.2)), 
        legend.position = "bottom") 


##############################################################
## TABLE 1 - NATURAL EXPERIMENT. Effects of the tax holiday 
## (difference in differences analysis).
##############################################################

# rescaling the time variable to account for the taxes that have twice as 
# many payments per year
taxes_panel$t_st_2 <- ifelse(taxes_panel$tax=="Sewage" | taxes_panel$tax=="Head", 
                             taxes_panel$t_st/2, taxes_panel$t_st)


# 1 year diff in diff setup
dd_data1 <-  rbind.data.frame(taxes_panel %>% filter(ES_BP==1) %>%
                                group_by(CUENTA, tax, TREATMENT) %>% dplyr::summarise(
                                  compliance_mean_DiD_1yr =
                                    mean(compliance[t_st>3 & t_st<=6], na.rm=T) - 
                                    mean(compliance[t_st<0 & t_st>=(-3)], na.rm=T),
                                  missed_payment_mean_DiD_1yr =
                                    mean(missed_payment[t_st>3 & t_st<=6], na.rm=T)-
                                    mean(missed_payment[t_st<0 & t_st>=(-3)], na.rm=T),
                                  nr_missed_payments_mean_DiD_1yr =
                                    mean(nr_paymntsowed[t_st> 3 & t_st<=6], na.rm=T)-
                                    mean(nr_paymntsowed[t_st<0 & t_st>=(-3)], na.rm=T),
                                  compliance_mean_DiD_1yr.yr2 =
                                    mean(compliance[t_st>6 & t_st<=9], na.rm=T) - 
                                    mean(compliance[t_st<0 & t_st>=(-3)], na.rm=T),
                                  missed_payment_mean_DiD_1yr.yr2 =
                                    mean(missed_payment[t_st>6 & t_st<=9], na.rm=T)-
                                    mean(missed_payment[t_st<0 & t_st>=(-3)], na.rm=T),
                                  nr_missed_payments_mean_DiD_1yr.yr2 =
                                    mean(nr_paymntsowed[t_st> 6 & t_st<=9], na.rm=T)-
                                    mean(nr_paymntsowed[t_st<0 & t_st>=(-3)], na.rm=T),
                                  compliance_mean_DiD_1yr.yr3 =
                                    mean(compliance[t_st>9 & t_st<=12], na.rm=T) - 
                                    mean(compliance[t_st<0 & t_st>=(-3)], na.rm=T),
                                  missed_payment_mean_DiD_1yr.yr3 =
                                    mean(missed_payment[t_st>9 & t_st<=12], na.rm=T)-
                                    mean(missed_payment[t_st<0 & t_st>=(-3)], na.rm=T),
                                  nr_missed_payments_mean_DiD_1yr.yr3 =
                                    mean(nr_paymntsowed[t_st>9 & t_st<=12], na.rm=T)-
                                    mean(nr_paymntsowed[t_st<0 & t_st>=(-3)], na.rm=T)
                                )
)


# 1 year diff in diff all taxes: 
# compliance, missed payments & number of missed payments

difference_in_means(compliance_mean_DiD_1yr ~ TREATMENT,
                    data = dd_data1)
difference_in_means(missed_payment_mean_DiD_1yr ~ TREATMENT, 
                    data = dd_data1)
difference_in_means(nr_missed_payments_mean_DiD_1yr ~ TREATMENT,
                    data = dd_data1)


# 1 year diff in diff by type of tax 
# (compliance, missed payments & number of missed payments)

# Property
difference_in_means(compliance_mean_DiD_1yr ~ TREATMENT, 
                    data = dd_data1[dd_data1$tax=="Property",])
difference_in_means(missed_payment_mean_DiD_1yr ~ TREATMENT, 
                    data = dd_data1[dd_data1$tax=="Property",])
difference_in_means(nr_missed_payments_mean_DiD_1yr ~ TREATMENT, 
                    data = dd_data1[dd_data1$tax=="Property",])

# Sewage
difference_in_means(compliance_mean_DiD_1yr ~ TREATMENT,
                  data = dd_data1[dd_data1$tax=="Sewage",])
difference_in_means(missed_payment_mean_DiD_1yr ~ TREATMENT, 
                    data = dd_data1[dd_data1$tax=="Sewage",])
difference_in_means(nr_missed_payments_mean_DiD_1yr ~ TREATMENT, 
                    data = dd_data1[dd_data1$tax=="Sewage",])

# Head
difference_in_means(compliance_mean_DiD_1yr ~ TREATMENT, 
                    data = dd_data1[dd_data1$tax=="Head",])
difference_in_means(missed_payment_mean_DiD_1yr ~ TREATMENT,
                  data = dd_data1[dd_data1$tax=="Head",])
difference_in_means(nr_missed_payments_mean_DiD_1yr ~ TREATMENT,
                  data = dd_data1[dd_data1$tax=="Head",])

# Vehicle
difference_in_means(compliance_mean_DiD_1yr ~ TREATMENT, 
                    data = dd_data1[dd_data1$tax=="Vehicle",])
difference_in_means(missed_payment_mean_DiD_1yr ~ TREATMENT, 
                    data = dd_data1[dd_data1$tax=="Vehicle",])
difference_in_means(nr_missed_payments_mean_DiD_1yr ~ TREATMENT, 
                    data = dd_data1[dd_data1$tax=="Vehicle",])



# 3 year diff in diff setup
dd_data <-  rbind.data.frame(taxes_panel %>% filter(ES_BP==1) %>%
                               group_by(CUENTA, tax, TREATMENT) %>% dplyr::summarise(
                                 
                                 compliance_mean_DiD_3yr =
                                   mean(compliance[t_st>3 & t_st<=12], na.rm=T)-
                                   mean(compliance[t_st<0 & t_st>=(-9)], na.rm=T),
                                 missed_payment_mean_DiD_3yr =
                                   mean(missed_payment[t_st>3 & t_st<=12], na.rm=T)-
                                   mean(missed_payment[t_st<0 & t_st>=(-9)], na.rm=T),
                                 nr_missed_payments_mean_DiD_3yr =
                                   mean(nr_paymntsowed[t_st> 3 & t_st<=12], na.rm=T)-
                                   mean(nr_paymntsowed[t_st<0 & t_st>=(-9)], na.rm=T)
                               )
)


# 3 year diff in diff all taxes:
# compliance, missed payments & number of missed payments
difference_in_means(compliance_mean_DiD_3yr ~ TREATMENT, 
                    data = dd_data)
difference_in_means(missed_payment_mean_DiD_3yr ~ TREATMENT, 
                    data = dd_data)
difference_in_means(nr_missed_payments_mean_DiD_3yr ~ TREATMENT, 
                    data = dd_data)

# 3 year diff in diff by type of tax
# (compliance, missed payments & number of missed payments)

# Property
difference_in_means(compliance_mean_DiD_3yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Property",])
difference_in_means(missed_payment_mean_DiD_3yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Property",])
difference_in_means(nr_missed_payments_mean_DiD_3yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Property",])

# Sewage
difference_in_means(compliance_mean_DiD_3yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Sewage",])
difference_in_means(missed_payment_mean_DiD_3yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Sewage",])
difference_in_means(nr_missed_payments_mean_DiD_3yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Sewage",])

# Head
difference_in_means(compliance_mean_DiD_3yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Head",])
difference_in_means(missed_payment_mean_DiD_3yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Head",])
difference_in_means(nr_missed_payments_mean_DiD_3yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Head",])

# Vehicle
difference_in_means(compliance_mean_DiD_3yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Vehicle",])
difference_in_means(missed_payment_mean_DiD_3yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Vehicle",])
difference_in_means(nr_missed_payments_mean_DiD_3yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Vehicle",])


##############################################################
## TABLE 2 - NATURAL EXPERIMENT. Effect of the tax holiday (T-test)
## Comparing winners to non-winners, difference of means test 
# for the total debt as of October, 2014. 
##############################################################

difference_in_means(debt_amount ~ won_lottery, 
                    data = naturalex_debt_gtp)


##############################################################
## TABLE 3 - NATURAL EXPERIMENT. Effects of the tax holiday 
## (difference in differences analysis), 5 year window
##############################################################


# 5 year diff in diff setup
dd_data <-  rbind.data.frame(taxes_panel %>% filter(ES_BP==1) %>%
                               group_by(CUENTA, tax, TREATMENT) %>% dplyr::summarise(
                                 compliance_mean_DiD_5yr =
                                   mean(compliance[t_st>3 & t_st<=18], na.rm=T)-
                                   mean(compliance[t_st<0 & t_st>=(-15)], na.rm=T),
                                 missed_payment_mean_DiD_5yr =
                                   mean(missed_payment[t_st>3 & t_st<=18], na.rm=T)-
                                   mean(missed_payment[t_st<0 & t_st>=(-15)], na.rm=T),
                                 nr_missed_payments_mean_DiD_5yr =
                                   mean(nr_paymntsowed[t_st> 3 & t_st<=18], na.rm=T)-
                                   mean(nr_paymntsowed[t_st<0 & t_st>=(-15)], na.rm=T)
                               )
)


# 5 year diff in diff all taxes:
# compliance, missed payments & number of missed payments
difference_in_means(compliance_mean_DiD_5yr ~ TREATMENT, 
                    data = dd_data)
difference_in_means(missed_payment_mean_DiD_5yr ~ TREATMENT, 
                    data = dd_data)
difference_in_means(nr_missed_payments_mean_DiD_5yr ~ TREATMENT, 
                    data = dd_data)

# 5 year diff in diff by type of tax
# (compliance, missed payments & number of missed payments)

# Property
difference_in_means(compliance_mean_DiD_5yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Property",])
difference_in_means(missed_payment_mean_DiD_5yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Property",])
difference_in_means(nr_missed_payments_mean_DiD_5yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Property",])

# Sewage
difference_in_means(compliance_mean_DiD_5yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Sewage",])
difference_in_means(missed_payment_mean_DiD_5yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Sewage",])
difference_in_means(nr_missed_payments_mean_DiD_5yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Sewage",])

# Head
difference_in_means(compliance_mean_DiD_5yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Head",])
difference_in_means(missed_payment_mean_DiD_5yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Head",])
difference_in_means(nr_missed_payments_mean_DiD_5yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Head",])

# Vehicle
difference_in_means(compliance_mean_DiD_5yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Vehicle",])
difference_in_means(missed_payment_mean_DiD_5yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Vehicle",])
difference_in_means(nr_missed_payments_mean_DiD_5yr ~ TREATMENT, 
                    data = dd_data[dd_data$tax=="Vehicle",])


##############################################################
## TABLE 4 - FIELD EXPERIMENT. Informational mechanism. 
# Good and bad taxpayers, comparison of treatments 1, 2 and
# 4 (pooled) vs. the placebo control group (treatment 0). 
# First differences use the value of the dependent variable 
# for the pre-treatment period (March 2014).
##############################################################

### Creating first differences outcomes for the field experiment
fieldex$missed_payment_DiD <- fieldex$JUL_2014_ontime - fieldex$MAR_2014_ontime # missed payment
fieldex$web_bill_DiD <- fieldex$july_web_access - fieldex$march_web_access # web access 
fieldex$payments_owed_DiD <- fieldex$july_nrbills_owed - fieldex$adeudadas_2014_MAR # nr bills owed

# compliance
fieldex$compliance_march <- ifelse(fieldex$march_ontime==1 & fieldex$adeudadas_2014_MAR==0, 1, 0)
fieldex$compliance_july <- ifelse(fieldex$JUL_2014_ontime==1 & fieldex$july_nrbills_owed==0, 1, 0)
fieldex$compliance_DiD <- fieldex$compliance_july - fieldex$compliance_march

# results 
difference_in_means(missed_payment_DiD ~ pooled_124_0, data = fieldex)
difference_in_means(payments_owed_DiD ~ pooled_124_0, data = fieldex)
difference_in_means(compliance_DiD ~ pooled_124_0, data = fieldex)
difference_in_means(web_bill_DiD ~ pooled_124_0, data = fieldex)


##############################################################
## TABLE 6 - FIELD EXPERIMENT: Comparison of effects for good 
# and bad taxpayers: difference of the difference of means for 
# the comparison of treatments 1, 2 and 4 (pooled) vs. the 
# placebo control group (treatment 0).
##############################################################

# Function to test the difference of the differences
comp.eff <- function(dm1, dm2){ 
  
  print("Difference in Means 1")
  print(dm1)
  
  print("Difference in Means 2")
  print(dm2)
  
  print("#####. Difference in Effects")
  
  diff <- dm1$coefficients - dm2$coefficients
  se.diff <- sqrt((dm1$std.error^2)+(dm2$std.error^2))
  t.val.diff <- diff/se.diff
  df <- dm1$nobs + dm2$nobs -2
  # Calculate the p-value
  p_val <- 2 * (1 - pt(abs(t.val.diff), df=df))
  
  res <- c(diff,se.diff,t.val.diff, p_val)
  names(res) <- c("Diff in effects", "SE", "t", "p-value")
  print(res)
  return(res) 
}

message("difference of the differences")
comp.eff(difference_in_means(missed_payment_DiD ~ pooled_124_0, 
                             data = filter(fieldex, type == "good taxpayer")),
         difference_in_means(missed_payment_DiD ~ pooled_124_0, 
                             data = filter(fieldex, type == "bad taxpayer")))

comp.eff(difference_in_means(web_bill_DiD ~ pooled_124_0, 
                             data = filter(fieldex, type == "good taxpayer")),
         difference_in_means(web_bill_DiD ~ pooled_124_0, 
                             data = filter(fieldex, type == "bad taxpayer")))


##############################################################
## TABLE 7-  NATURAL EXPERIMENT. Income effects. Comparison 
# of winners vs. non-winners: heterogeneous effects of winning 
# the lottery by tax bracket.
##############################################################

# Coding tax brackets
a <- 418958
b <- 1047393
c <- 2094784
d <- 41895699

taxes_panel$tax_bracket <- "D"
taxes_panel$tax_bracket[taxes_panel$VALOR_CATASTRALACTUAL<d] <- "C"
taxes_panel$tax_bracket[taxes_panel$VALOR_CATASTRALACTUAL<c] <- "B"
taxes_panel$tax_bracket[taxes_panel$VALOR_CATASTRALACTUAL<b] <- "A"
taxes_panel$tax_bracket[is.na(taxes_panel$VALOR_CATASTRALACTUAL)] <- NA

by_tax_bracket <- taxes_panel %>% dplyr::filter(!is.na(tax_bracket) & t_st==4) %>% 
  group_by(tax_bracket) %>%
  dplyr::summarize(
    N_treat = sum(TREATMENT==1),
    N_control = sum(TREATMENT==0),
    missed_treat = mean(missed_payment[TREATMENT==1], na.rm=T),
    missed_control = mean(missed_payment[TREATMENT==0], na.rm=T)
  )

by_tax_bracket
chisq.test(by_tax_bracket[,c(4,5)])


# HTEs by Valor Catastral

# 1 year diff in diff setup
dd_data_vc <-  rbind.data.frame(taxes_panel %>% filter(ES_BP==1 & tax=="Property") %>%
                                  group_by(CUENTA, TREATMENT, VALOR_CAT2004) %>% dplyr::summarise(
                                    compliance_mean_DiD_1yr =
                                      mean(compliance[t_st>3 & t_st<=6], na.rm=T) - 
                                      mean(compliance[t_st<0 & t_st>=(-3)], na.rm=T),
                                    missed_payment_mean_DiD_1yr =
                                      mean(missed_payment[t_st>3 & t_st<=6], na.rm=T)-
                                      mean(missed_payment[t_st<0 & t_st>=(-3)], na.rm=T),
                                    nr_missed_payments_mean_DiD_1yr =
                                      mean(nr_paymntsowed[t_st> 3 & t_st<=6], na.rm=T)-
                                      mean(nr_paymntsowed[t_st<0 & t_st>=(-3)], na.rm=T)
                                  )
)

dd_data_vc$high_propvalue <- ifelse(dd_data_vc$VALOR_CAT2004 > 
                                      median(dd_data_vc$VALOR_CAT2004, na.rm=T),
                                    1, 0)

income_missed <- comp.eff(difference_in_means(missed_payment_mean_DiD_1yr ~ TREATMENT, 
                                              data = filter(dd_data_vc, high_propvalue == 1)),
                          difference_in_means(missed_payment_mean_DiD_1yr ~ TREATMENT, 
                                              data = filter(dd_data_vc, high_propvalue == 0)))
income_missed

income_nrmissed <- comp.eff(difference_in_means(nr_missed_payments_mean_DiD_1yr ~ TREATMENT, 
                                                data = filter(dd_data_vc, high_propvalue == 1)),
                            difference_in_means(nr_missed_payments_mean_DiD_1yr ~ TREATMENT, 
                                                data = filter(dd_data_vc, high_propvalue == 0)))
income_nrmissed

income_compliance <- comp.eff(difference_in_means(compliance_mean_DiD_1yr ~ TREATMENT, 
                                                  data = filter(dd_data_vc, high_propvalue == 1)),
                              difference_in_means(compliance_mean_DiD_1yr ~ TREATMENT, 
                                                  data = filter(dd_data_vc, high_propvalue == 0)))

income_compliance

##############################################################
## TABLE 8. NATURAL EXPERIMENT. Habit effects. Winners vs. 
# non-winners: heterogeneous treatment effects by time since 
# winning (heterogeneous effects; 1, 2 and 3 years).
##############################################################

# Yr 1 vs 2
comp.eff(difference_in_means(missed_payment_mean_DiD_1yr ~ TREATMENT,
                             data = dd_data1),
         difference_in_means(missed_payment_mean_DiD_1yr.yr2 ~ TREATMENT,
                             data = dd_data1))

comp.eff(difference_in_means(nr_missed_payments_mean_DiD_1yr ~ TREATMENT,
                             data = dd_data1),
         difference_in_means(nr_missed_payments_mean_DiD_1yr.yr2 ~ TREATMENT,
                             data = dd_data1))

comp.eff(difference_in_means(compliance_mean_DiD_1yr ~ TREATMENT,
                             data = dd_data1),
         difference_in_means(compliance_mean_DiD_1yr.yr2 ~ TREATMENT,
                             data = dd_data1))

# Yr 2 vs 3
comp.eff(difference_in_means(missed_payment_mean_DiD_1yr.yr2 ~ TREATMENT,
                             data = dd_data1),
         difference_in_means(missed_payment_mean_DiD_1yr.yr3 ~ TREATMENT,
                             data = dd_data1))

comp.eff(difference_in_means(nr_missed_payments_mean_DiD_1yr.yr2 ~ TREATMENT,
                             data = dd_data1),
         difference_in_means(nr_missed_payments_mean_DiD_1yr.yr3 ~ TREATMENT,
                             data = dd_data1))

comp.eff(difference_in_means(compliance_mean_DiD_1yr.yr2 ~ TREATMENT,
                             data = dd_data1),
         difference_in_means(compliance_mean_DiD_1yr.yr3 ~ TREATMENT,
                             data = dd_data1))

##############################################################
## TABLE 9. FIELD EXPERIMENT. Priming knowledge of punishment. 
# Good and bad taxpayers, comparison of treatments 3 and 5 
# (pooled) vs. the placebo control group (treatment 0).
##############################################################

difference_in_means(missed_payment_DiD ~ pooled_35_0, data = fieldex)
difference_in_means(payments_owed_DiD ~ pooled_35_0, data = fieldex)
difference_in_means(web_bill_DiD ~ pooled_35_0, data = fieldex)

difference_in_means(compliance_DiD ~ pooled_35_0, data = fieldex)

##############################################################
# TABLE 10. FIELD EXPERIMENT: Comparison of effects for good 
# and bad taxpayers: difference of the difference in means for 
# the comparison of treatments 3 and 5 (priming knowledge of 
# sanctions, pooled) vs. the placebo control group (treatment 0).
##############################################################

comp.eff(difference_in_means(missed_payment_DiD ~ pooled_35_0, 
                             data = filter(fieldex, type == "good taxpayer")),
         difference_in_means(missed_payment_DiD ~ pooled_35_0, 
                             data = filter(fieldex, type == "bad taxpayer")))

comp.eff(difference_in_means(web_bill_DiD ~ pooled_35_0, 
                             data = filter(fieldex, type == "good taxpayer")),
         difference_in_means(web_bill_DiD ~ pooled_35_0, 
                             data = filter(fieldex, type == "bad taxpayer")))

##############################################################
# TABLE 11. FIELD EXPERIMENT. Positive vs negative incentives. 
# Good and bad taxpayers, comparison of treatments 1, 2 and 4 
# (positive incentives, pooled) vs 3 and 5 (negative incentives, pooled). 
# Test using compliance conditional on significant effects for missed payment,
# number of payments owed or total debt.
##############################################################

difference_in_means(missed_payment_DiD ~ pooled_124_35, data = fieldex)
difference_in_means(payments_owed_DiD ~ pooled_124_35, data = fieldex)
difference_in_means(web_bill_DiD ~ pooled_124_35, data = fieldex)

##############################################################
# TABLE 12. FIELD EXPERIMENT. Comparison of effects of positive 
# vs negative incentives for good and bad taxpayers: difference 
# of the difference in means for the comparison of treatments 
# 1, 2 and 4 (pooled) and 3 and 5 (pooled).
##############################################################

comp.eff(difference_in_means(missed_payment_DiD ~ pooled_124_35, 
                             data = filter(fieldex, type == "good taxpayer")),
         difference_in_means(missed_payment_DiD ~ pooled_124_35, 
                             data = filter(fieldex, type == "bad taxpayer")))

comp.eff(difference_in_means(payments_owed_DiD ~ pooled_124_35, 
                             data = filter(fieldex, type == "good taxpayer")),
         difference_in_means(payments_owed_DiD ~ pooled_124_35, 
                             data = filter(fieldex, type == "bad taxpayer")))

comp.eff(difference_in_means(web_bill_DiD ~ pooled_124_35, 
                             data = filter(fieldex, type == "good taxpayer")),
         difference_in_means(web_bill_DiD ~ pooled_124_35, 
                             data = filter(fieldex, type == "bad taxpayer")))


##############################################################
# TABLE 13. FIELD EXPERIMENT. Marginal taxpayers. Good taxpayers. 
# Heterogeneous effects, taxpayers at risk. Comparison of 
# treatment effect of 1, 2 and 4 (pooled) vs control 
# (A-Information about the tax lottery), on one test and 3 
# and 5 (pooled) vs control on another (B-Information about sanctions).
##############################################################

# Identifying good taxpayers with history of debt 
# Ever owed a bill since March 2009? 
names(fieldex)[grepl("adeudadas", names(fieldex))]

sum_bills_owed <- apply(fieldex[,grepl("adeudadas", names(fieldex))], 1, sum)
fieldex$goodtp_at_risk <- ifelse(sum_bills_owed>0, 1, 0) 
table(fieldex$goodtp_at_risk[fieldex$type=="good taxpayer"])

### A. Information on the lottery - Heterogeneous effects for taxpayers at risk
comp.eff(difference_in_means(missed_payment_DiD ~ pooled_124_0, 
                             data = filter(fieldex, type == "good taxpayer" & goodtp_at_risk==1)),
         difference_in_means(missed_payment_DiD ~ pooled_124_0, 
                             data = filter(fieldex, type == "good taxpayer" & goodtp_at_risk==0)))

comp.eff(difference_in_means(web_bill_DiD ~ pooled_124_0, 
                             data = filter(fieldex, type == "good taxpayer" & goodtp_at_risk==1)),
         difference_in_means(web_bill_DiD ~ pooled_124_0, 
                             data = filter(fieldex, type == "good taxpayer" & goodtp_at_risk==0)))

### B. Information on sanctions - Heterogeneous effects for taxpayers at risk
comp.eff(difference_in_means(missed_payment_DiD ~ pooled_35_0, 
                             data = filter(fieldex, type == "good taxpayer" & goodtp_at_risk==1)),
         difference_in_means(missed_payment_DiD ~ pooled_35_0, 
                             data = filter(fieldex, type == "good taxpayer" & goodtp_at_risk==0)))

comp.eff(difference_in_means(web_bill_DiD ~ pooled_35_0, 
                             data = filter(fieldex, type == "good taxpayer" & goodtp_at_risk==1)),
         difference_in_means(web_bill_DiD ~ pooled_35_0, 
                             data = filter(fieldex, type == "good taxpayer" & goodtp_at_risk==0)))


##############################################################
# TABLE 14. FIELD EXPERIMENT. Marginal taxpayers. Bad taxpayers. 
# Heterogeneous effects, salvageable taxpayers. Comparison of treatment 
# effect of 1, 2 and 4 (pooled) vs control (A-Information about the 
# tax lottery), on one test and 3 and 5 (pooled) vs control on 
# another (B-Information about sanctions). Test using compliance 
# conditional on significant effects for missed payment, number 
# of payments owed or total debt.
##############################################################

# Identifying bad taxpayers not too in debt
# Ever owed a bill since March 2009?
fieldex$salvageable_btp <- ifelse(fieldex$adeudadas_2014_MAR>3, 0, 1)

### A. Information on the lottery - Heterogeneous effects for taxpayers at risk

# Missed payments
comp.eff(difference_in_means(missed_payment_DiD ~ pooled_124_0, 
                             data = filter(fieldex, type == "bad taxpayer" & salvageable_btp==1)),
         difference_in_means(missed_payment_DiD ~ pooled_124_0, 
                             data = filter(fieldex, type == "bad taxpayer" & salvageable_btp==0)))

# Web access
comp.eff(difference_in_means(web_bill_DiD ~ pooled_124_0, 
                             data = filter(fieldex, type == "bad taxpayer" & salvageable_btp==1)),
         difference_in_means(web_bill_DiD ~ pooled_124_0, 
                             data = filter(fieldex, type == "bad taxpayer" & salvageable_btp==0)))

# Nr of payments owed
comp.eff(difference_in_means(payments_owed_DiD ~ pooled_124_0, 
                             data = filter(fieldex, type == "bad taxpayer" & salvageable_btp==1)),
         difference_in_means(payments_owed_DiD ~ pooled_124_0, 
                             data = filter(fieldex, type == "bad taxpayer" & salvageable_btp==0)))

# Compliance
comp.eff(difference_in_means(compliance_DiD ~ pooled_124_0, 
                             data = filter(fieldex, type == "bad taxpayer" & salvageable_btp==1)),
         difference_in_means(compliance_DiD ~ pooled_124_0, 
                             data = filter(fieldex, type == "bad taxpayer" & salvageable_btp==0)))

### B. Information on sanctions - Heterogeneous effects for taxpayers at risk

# Missed payments
comp.eff(difference_in_means(missed_payment_DiD ~ pooled_35_0, 
                             data = filter(fieldex, type == "bad taxpayer" & salvageable_btp==1)),
         difference_in_means(missed_payment_DiD ~ pooled_35_0, 
                             data = filter(fieldex, type == "bad taxpayer" & salvageable_btp==0)))

# Web access
comp.eff(difference_in_means(web_bill_DiD ~ pooled_35_0, 
                             data = filter(fieldex, type == "bad taxpayer" & salvageable_btp==1)),
         difference_in_means(web_bill_DiD ~ pooled_35_0, 
                             data = filter(fieldex, type == "bad taxpayer" & salvageable_btp==0)))

# Nr of payments owed
comp.eff(difference_in_means(payments_owed_DiD ~ pooled_35_0, 
                             data = filter(fieldex, type == "bad taxpayer" & salvageable_btp==1)),
         difference_in_means(payments_owed_DiD ~ pooled_35_0, 
                             data = filter(fieldex, type == "bad taxpayer" & salvageable_btp==0)))

# Compliance
comp.eff(difference_in_means(compliance_DiD ~ pooled_35_0, 
                             data = filter(fieldex, type == "bad taxpayer" & salvageable_btp==1)),
         difference_in_means(compliance_DiD ~ pooled_35_0, 
                             data = filter(fieldex, type == "bad taxpayer" & salvageable_btp==0)))


##############################################################
# TABLE 15. FIELD EXPERIMENT. Good and bad taxpayers. Social 
# vs individual rewards. Comparison of treatments 1, 2 (pooled) vs 4. 
# Test using compliance conditional on significant effects for 
# missed payment, number of payments owed or total debt.
##############################################################

# Missed payments
difference_in_means(missed_payment_DiD ~ pooled_12_4, data = fieldex)

# Web access
difference_in_means(web_bill_DiD ~ pooled_12_4, data = fieldex)

# Nr of payments owed
difference_in_means(payments_owed_DiD ~ pooled_12_4, data = fieldex)

# Compliance
difference_in_means(compliance_DiD ~ pooled_12_4, data = fieldex)

##############################################################
# TABLE 16. FIELD EXPERIMENT. Social (4) vs individual rewards 
# (1 and 2, pooled), comparison of effect between good and bad taxpayers.
##############################################################

# Missed payments
comp.eff(difference_in_means(missed_payment_DiD ~ pooled_12_4, 
                             data = filter(fieldex, type == "bad taxpayer")),
         difference_in_means(missed_payment_DiD ~ pooled_12_4, 
                             data = filter(fieldex, type == "good taxpayer")))

# Web access
comp.eff(difference_in_means(web_bill_DiD ~ pooled_12_4, 
                             data = filter(fieldex, type == "bad taxpayer")),
         difference_in_means(web_bill_DiD ~ pooled_12_4, 
                             data = filter(fieldex, type == "good taxpayer")))


##############################################################
# TABLE 17. FIELD EXPERIMENT. Good and bad taxpayers. Social vs 
# individual sanctions. Comparison of treatments 3 vs 5. Test 
# using compliance conditional on significant effects for missed 
# payment, number of payments owed or total debt.
##############################################################

# Missed payments
difference_in_means(missed_payment_DiD ~ pooled_3_5, data = fieldex)

# Web access
difference_in_means(web_bill_DiD ~ pooled_3_5, data = fieldex)

# Nr of payments owed
difference_in_means(payments_owed_DiD ~ pooled_3_5, data = fieldex)

# Compliance
difference_in_means(compliance_DiD ~ pooled_3_5, data = fieldex)



##############################################################
# TABLE 18. FIELD EXPERIMENT. Social vs individual sanctions. 
# Comparison of effects for good and bad taxpayers. 
# Comparison of treatments 3 vs 5. Test using compliance conditional 
# on significant effects for missed payment, number of payments owed or total debt.
##############################################################

# Missed payments
comp.eff(difference_in_means(missed_payment_DiD ~ pooled_3_5, 
                             data = filter(fieldex, type == "bad taxpayer")),
         difference_in_means(missed_payment_DiD ~ pooled_3_5, 
                             data = filter(fieldex, type == "good taxpayer")))

# Web access
comp.eff(difference_in_means(web_bill_DiD ~ pooled_3_5, 
                             data = filter(fieldex, type == "bad taxpayer")),
         difference_in_means(web_bill_DiD ~ pooled_3_5, 
                             data = filter(fieldex, type == "good taxpayer")))


################################################################################
# PAP 2 - SURVEY EXPERIMENT

message("load data")

load("survey_data.Rda")
##############################################################

##############################################################
# Social vs Individual Benefit
##############################################################

# Pooling the individual benefits treatments (questionaire versions 2 and 3) 
# and recoding as treatment=1.
survey_data$social_individual <- ifelse((survey_data$treatment==2|
                                           survey_data$treatment==3),1,NA) 
# Recoding the social benefits treatment as 0.
survey_data$social_individual <- ifelse((survey_data$treatment==1),0,
                                        survey_data$social_individual)


# Outcome: "Policies that reward good taxpayers are a waste of money" 
# totally disagree (0) - totally agree (10)
ben1 <- difference_in_means(S1p1 ~ social_individual, data = survey_data)

# Outcome: "It is worth it to be up to date with ones taxes" 
# totally disagree (0) - totally agree (10)
ben2 <- difference_in_means(S1p3 ~ social_individual, data = survey_data)

social_individual <- rbind.data.frame(ben1, ben2) 
rownames(social_individual) <- c("Rewards are waste of money", 
                                 "Worth it to be up to date")

# Adding p-value adjustments
social_individual <- social_individual[order(social_individual[,6], decreasing=F),] 
# Ordering p-values in decreasing order
ordered.ps <- social_individual[, 6]
# Building reference vector to compare to ordered p-values
FDR_reference <- .05*(1:length(ordered.ps)/length(ordered.ps))
# Comparing p-values to referece vector
FDR <- as.data.frame(cbind(ordered.ps, FDR_reference, ordered.ps<=FDR_reference))
FDR[,3] <- as.numeric(FDR[,3])
if (sum(FDR[,3])>0){
  fdr <- which(FDR[,1]==max(FDR[,1][FDR[,3]==1]))
  FDR[,4] <- c(rep("reject null", fdr), rep("do not reject", nrow(FDR)-fdr))}

if (sum(FDR[,3])==0){
  FDR[,4] <- rep("do not reject", nrow(FDR))}

Bonferroni_reference <- rep(.05/nrow(FDR), nrow(FDR)) 
Bonferroni_reject <- ifelse(ordered.ps<=Bonferroni_reference,
                            "reject null", "do not reject")

social_individual <- cbind(social_individual, FDR[,c(2,4)],
                           Bonferroni_reference,
                           Bonferroni_reject) 

names(social_individual)[17] <- "FDR_reject"
social_individual

##############################################################
# Discretionary vs lottery allocation of benefits
##############################################################

# Generating discretionary benefits dummy where surveys with the discretionary version 
# of the survey are 1.
survey_data$treat_discretion <- ifelse((survey_data$treatment==4),1,0)

# Outcome: "In Montevideo, rewards for good taxpayers go to the same people as always" 
# totally disagree (0) - totally agree (10)
discretion1 <- difference_in_means(S1p4 ~ treat_discretion, 
                                   data = survey_data)
discretion1

# Outcome: "Policies that reward good taxpayers are a waste of money" 
# totally disagree (0) - totally agree (10)
discretion2 <- difference_in_means(S1p1 ~ treat_discretion, 
                                   data = survey_data)
discretion2

# Outcome: "It is worth it to be up to date with ones taxes" 
# totally disagree (0) - totally agree (10)
discretion3 <- difference_in_means(S1p3 ~ treat_discretion, 
                                   data = survey_data)
discretion3

# Outcome: "In general, the municipal government does a good job" 
# totally disagree (0) - totally agree (10)
discretion4 <- difference_in_means(S1p2 ~ treat_discretion, 
                                   data = survey_data)
discretion4

# Outcome: "How would you classify the taxes that the municipal 
# government charges in gene 
# very just (1) - not just at all (4)
discretion5 <- difference_in_means(S1p5 ~ treat_discretion, 
                                   data = survey_data)
discretion5

discretion <- rbind.data.frame(discretion1, discretion2, discretion3, discretion4, discretion5)
rownames(discretion) <- c("Rewards go to the same people as always",
                          "Rewards are waste of money",
                          "Worth it to be up to date",
                          "Mun.gov. does a good job",
                          "Mun. taxes are just")


# Adding p-value adjustments
discretion <- discretion[order(discretion[,6], decreasing=F),] 
# Ordering p-values in decreasing order
ordered.ps <- discretion[,6]
# Building reference vector to compare to ordered p-values
FDR_reference <- .05*(1:length(ordered.ps)/length(ordered.ps))
# Comparing p-values to referece vector
FDR <- as.data.frame(cbind(ordered.ps, FDR_reference, ordered.ps<=FDR_reference))
FDR[,3] <- as.numeric(FDR[,3])
if (sum(FDR[,3])>0){
  fdr <- which(FDR[,1]==max(FDR[,1][FDR[,3]==1]))
  FDR[,4] <- c(rep("reject null", fdr), rep("do not reject", nrow(FDR)-fdr))}

if (sum(FDR[,3])==0){
  FDR[,4] <- rep("do not reject", nrow(FDR))}

Bonferroni_reference <- rep(.05/nrow(FDR), nrow(FDR)) 
Bonferroni_reject <- ifelse(ordered.ps<=Bonferroni_reference,"reject null", "do not reject")

discretion <- cbind(discretion, FDR[,c(2,4)],
                    Bonferroni_reference,
                    Bonferroni_reject) 

names(discretion)[17] <- "FDR_reject"
discretion


##############################################################
# Fines and charges vs. benefits of tax holidays
##############################################################

# Creating dataframe with relevant treatments and outcomes.
# Keeping outcomes we want for benefits
ben <- survey_data[(survey_data$treatment!=4),names(survey_data) %in%
                     c("treatment", "S1p2", "S1p3","S1p5" )] 
names(ben)

# Benefits pooled (S1p2) (A, B and C)
ben$benefits_punishments <- 1

# versus fines and charges pooled (M1p3)  (A, B , C and D)
fin <- survey_data[,names(survey_data) %in% c("treatment", "M1p2", "M1p3","M1p6")] 
names(fin)

# Pooling punishments
fin$benefits_punishments <- 0

# For three questions the outcomes are the same in the punishments and benefits conditions 
# but have different survey question numbers. Here we rename the variables so
# that we can bind the datasets into one.
names(ben)

names(ben)[1:3] <- c("M1p3", "M1p2", "M1p6") 
pooled <- rbind(ben,fin)


# Outcome: "In general, the municipal government does a good job" 
# totally disagree (0) - totally agree (10)
benefits_punishments1 <- difference_in_means(M1p3 ~ benefits_punishments, data = pooled)
benefits_punishments1

# Outcome: "In Montevideo, it is worth it to be up to date on ones taxes" 
# totally disagree (0) - totally agree (10)
benefits_punishments2 <- difference_in_means(M1p2 ~ benefits_punishments, data = pooled)
benefits_punishments2

# Outcome: "How would you classify the taxes that the municipal government charges?" 
# very just (1) - not just at all (4)
benefits_punishments3 <- difference_in_means(M1p6 ~ benefits_punishments, data = pooled)
benefits_punishments3

benefits_punishments <- rbind.data.frame(benefits_punishments1,
                                         benefits_punishments2, 
                                         benefits_punishments3)
rownames(benefits_punishments) <- c("Mun. gov. does a good job", 
                                    "Worth it to be up to date",
                                    "Mun. taxes are just")

# Adding p-value adjustments
benefits_punishments <- benefits_punishments[order(benefits_punishments[,6], decreasing=F),] 
# Ordering p-values in decreasing order
ordered.ps <- benefits_punishments[,6]
# Building reference vector to compare to ordered p-values
FDR_reference <- .05*(1:length(ordered.ps)/length(ordered.ps))
# Comparing p-values to referece vector
FDR <- as.data.frame(cbind(ordered.ps, FDR_reference, ordered.ps<=FDR_reference))
FDR[,3] <- as.numeric(FDR[,3])
if (sum(FDR[,3])>0){
  fdr <- which(FDR[,1]==max(FDR[,1][FDR[,3]==1]))
  FDR[,4] <- c(rep("reject null", fdr), rep("do not reject", nrow(FDR)-fdr))}

if (sum(FDR[,3])==0){
  FDR[,4] <- rep("do not reject", nrow(FDR))}

Bonferroni_reference <- rep(.05/nrow(FDR), nrow(FDR)) 
Bonferroni_reject <- ifelse(ordered.ps<=Bonferroni_reference,"reject null", "do not reject")

benefits_punishments <- cbind(benefits_punishments, FDR[,c(2,4)],
                              Bonferroni_reference,
                              Bonferroni_reject) 

names(benefits_punishments)[17] <- "FDR_reject"
benefits_punishments


```
